package com.example.demo.service.timing;

import org.springframework.scheduling.annotation.SchedulingConfigurer;
import org.springframework.scheduling.config.ScheduledTaskRegistrar;
import org.springframework.scheduling.support.CronTrigger;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.io.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;

@Component
public class XpathSpiderDynamicTimingService implements PageProcessor, SchedulingConfigurer {


    // 部分一：抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private Site site = Site.me()
            // 重试次数
            .setCycleRetryTimes(3)
            // 抓取间隔 ms
            .setSleepTime(1000);

    @Override
    // process是定制爬虫逻辑的核心接口，在这里编写抽取逻辑
    //  page 爬虫执行后返回的页面信息
    public void process(Page page) {
        // 获取page中的静态页面代码
        Html html = page.getHtml();
        // 1、选取 div标签 class=box-content 的元素
        // 2、选取当前div中的所有 li标签
        List<Selectable> selectables = html.xpath("//div[@class=box-content]//li").nodes();
        List<String> hrefs = new ArrayList<>();
        // 选择当前div的所有li标签
        selectables.forEach(item -> {
            // 链接 (a标签的href属性)
            String href = item.xpath("//a//@href").get();
            // 标题 (span标签中的文本)
            String title = item.xpath("//span//text()").get();
            // 文章发布时间(em标签里的文本内容)
            String date = item.xpath("//em//text()").get();
            // 文章摘要(p标签里的文本内容)
            String abstracts = item.xpath("//p//text()").get();
            hrefs.add(href);
//            page.addTargetRequest(href);
            System.out.println(date + " " + title + " " + href + " " + abstracts);
        });
    }

    @Override
    public Site getSite() {
        return site;
    }

    /**
     * 从磁盘文件中读取内容
     * @return
     * @throws IOException
     */
    public String readFromFile(File file) {
        StringBuffer sb = new StringBuffer();
        if(file.isFile() && file.exists()) {
            try {
                //FileInputStream用于从文件读取数据,读取文件成字节流
                FileInputStream fs = new FileInputStream(file);
                //使用InputStream从文件里读取数据，将字节流转换为字符流.
                InputStreamReader isr = new InputStreamReader(fs);
                //从字符输入流中读取文本，缓冲各个字符
                BufferedReader br = new BufferedReader(isr);


                String text = null;
                while((text = br.readLine()) != null) {
                    //将读取到的文本存入stringbuffer中
                    sb.append(text);
                }
                System.out.println("读取结果：" + sb.toString());

            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return sb.toString();
    }


    @Override
    public void configureTasks(ScheduledTaskRegistrar taskRegistrar) {
        AtomicReference<String> period = new AtomicReference<>("*/5 * * * * ?");
        taskRegistrar.addTriggerTask(
                //1.添加任务内容(Runnable)
                () -> {
                    File file = new File("D:\\webmagic\\time.txt");
                    period.set(readFromFile(file));
                    try {
                        System.out.println("执行动态定时任务: " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
                        Spider.create(new XpathSpiderDynamicTimingService())
                                .addUrl("http://www.ccpit-henan.org/mcxw/index.jhtml")
                                .thread(5)
                                .run();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                },
                //2.设置执行周期(Trigger)
                triggerContext -> {
                    //2.3 返回执行周期(Date)
                    return new CronTrigger(period.get()).nextExecutionTime(triggerContext);
                }
        );


    }
}
